#First, load the file
zillow = data.frame(read.csv("price.csv"))
# head(zillow)
# colSums(is.na(zillow))
# summary(zillow)
zillow = na.omit(zillow)
# colSums(is.na(zillow))
# zillow
ggplot(zillow, aes(x=State)) + geom_bar(colour="blue", fill="purple", alpha=0.6)

num_city = 10
values=head(zillow, num_city, )
values=data.frame(t(as.matrix(values[,7:81])))
colnames(values)=zillow[1:num_city,2]
date = seq(as.Date("2010/11/01"), as.Date("2017/01/31"), "month")
date = as.yearmon(date)
ts=zoo(values,order.by = date)
values=fortify(ts)
values$Index=as.Date(values$Index)
autoplot(ts,facets = NULL)+ geom_point(size=0.5) +
theme_minimal()+
labs(x="Time",y="Price")

price = gather(data = zillow, "Month", "Price", 7:81, factor_key = T)
ggplot(price[price$State == "CA" & !is.na(price$Metro),]) +
geom_boxplot(aes(x = fct_rev(Metro),
y = as.numeric(Price)), fill="#FF9999", color="#56B4E9", outlier.size = 0.5) +
labs(x = "Metro Area", y = "Price", #note these are opposite because I use coord_flip to flip the axes
colour = "#E0E0E0") +
coord_flip()

price = gather(data = zillow, "Month", "Price", 7:81, factor_key = T)
ggplot(price[price$State == "PA" & !is.na(price$Metro),]) +
geom_boxplot(aes(x = fct_rev(Metro),
y = as.numeric(Price)), fill="#9999CC", color="#66CC99", outlier.size = 0.5) +
labs(x = "Metro Area", y = "Price", #note these are opposite because I use coord_flip to flip the axes
colour = "#E0E0E0") +
coord_flip()

# price
ggplot(price[price$County == "Dallas" & !is.na(price$City),],
aes(y = fct_rev(City), x = Price)) +
labs(title = "Zillow | Dallas County Rent Prices",
subtitle = "2010 - 2017",
x = "Price", y = "Dallas County",
colour = "Time") +
geom_point(shape = 20, alpha = 0.4, size = 3, aes(color = Month)) +
scale_color_discrete(l = 45, h = c(30, 330))

ggplot(price[price$County == "Los Angeles" & !is.na(price$City),],
aes(y = fct_rev(City), x = Price)) +
labs(title = "Zillow | Los Angeles County Rent Prices",
subtitle = "2010 - 2017",
x = "Price", y = "LA County",
colour = "Time") +
geom_point(shape = 16, alpha = 0.4, size = 3, aes(color = Month)) +
scale_colour_viridis_d()

# scale_colour_gradientn(colours=rainbow(4))
# Create variable of numeric year
price$Year = as.character(price$Month)
price$Year = as.numeric(substr(price$Month, nchar(as.character(price$Month)) - 3, nchar(as.character(price$Month))))
# Calculate range for each state, by year
states = price[!is.na(price$State),] %>%
group_by(State, Year) %>%
summarise(Mean = round(mean(Price),0),
Min = min(Price),
Max = max(Price))
# Plot change over time, by state.
ggplot(states[!is.na(states$Mean),],
aes(x = fct_rev(State),
y = as.numeric(Mean))) +
labs(title = "Zillow | Mean US Rent Prices",
subtitle = "2010 - 2017",
x = "States", y = "Average Price",
colour = "#E0E0E0") +
geom_point(shape = 20, alpha = 0.8, size = 5, aes(color = Year)) +
scale_color_continuous(aes(guide = ""), low = "black", high = "purple") +
guides(fill = guide_colourbar(barwidth = 0.7, barheight = 15)) +
coord_flip()

library("zoo")
library(ggfortify)
library(reshape2)
num_city = 5
values=head(zillow, num_city, )
values=data.frame(t(as.matrix(values[,7:81])))
colnames(values)=zillow[1:num_city,2]
date = seq(as.Date("2010/11/01"), as.Date("2017/01/31"), "month")
date = as.yearmon(date)
ts=zoo(values,order.by = date)
values=fortify(ts)
values$Index=as.Date(values$Index)
autoplot(ts,facets = NULL)+ geom_hex(size=1.5, alpha=0.7) + scale_fill_viridis_c() +
guides(fill = guide_colourbar(barwidth = 0.7, barheight = 15)) +
theme_minimal()+
labs(x="Time",y="Price")
